********Clean and compile data needed for analysis from IFLS4********

cd "$ifls4rawdata"

***Clean Book 3A, which contains individuals' demographics, risk preference questions, education, work, religion
use b3a_cov.dta, clear

*Attentiveness questions
rename cp1 b3acp1
rename cp2 b3acp2
rename cp3 b3acp3

*Highest level of education
merge 1:1 hhid07 pid07 using b3a_dl1.dta, gen(_mergedl1) keepus(dl06)

*Risk questions
merge 1:1 hhid07 pid07 using b3a_si.dta, gen(_mergesi) keepus(si*)

*Work information
merge 1:1 hhid07 pid07 using b3a_tk1.dta, gen(_mergetk1) keepus(tk01 tk02 tk03 tk03 tk04 tk16d)
merge 1:1 hhid07 pid07 using b3a_tk2.dta, gen(_mergetk2) keepus(tk19ab tk24a)

*Religion and religiosity, and village safety
merge 1:1 hhid07 pid07 using b3a_tr.dta, gen(_mergetr) keepus(tr06 tr07 tr11 tr12)

*Subjective well-being
merge 1:1 hhid07 pid07 using b3a_sw.dta, gen(_mergesw) keepus(sw04 sw05)


tempfile book3a_clean
save "`book3a_clean'", replace

***Clean Book 3B, which contains individuals' smoking behavior
use b3b_cov.dta, clear

*Smoking behavior
merge 1:1 hhid07 pid07 using b3b_km.dta, gen(_mergekm) keepus(km01* km04 km08*)

tempfile book3b_clean
save "`book3b_clean'", replace

***Clean Book EK, which contains individuals' cognitive measures

*Easier test
use bek_ek1.dta, clear

*Keep ones with results
keep if result == 1

gen score1 = 0
la var score1 "cum. score Rav SPM1"
replace score1 = score1 + 1 if ek1 == "E"
replace score1 = score1 + 1 if ek2 == "F"
replace score1 = score1 + 1 if ek3 == "A"
replace score1 = score1 + 1 if ek4 == "D"
replace score1 = score1 + 1 if ek5 == "C"
replace score1 = score1 + 1 if ek6 == "B"
replace score1 = score1 + 1 if ek7 == "E"
replace score1 = score1 + 1 if ek8 == "B"
replace score1 = score1 + 1 if ek9 == "C"
replace score1 = score1 + 1 if ek10 == "B"
replace score1 = score1 + 1 if ek11 == "C"
replace score1 = score1 + 1 if ek12 == "E"

gen score1_a = 0
la var score1_a "alt cum. score Rav SPM1 (same as score 2 in ek2)"
replace score1_a = score1_a + 1 if ek1 == "E"
replace score1_a = score1_a + 1 if ek2 == "F"
replace score1_a = score1_a + 1 if ek3 == "A"
replace score1_a = score1_a + 1 if ek4 == "D"
replace score1_a = score1_a + 1 if ek5 == "C"
replace score1_a = score1_a + 1 if ek6 == "B"
replace score1_a = score1_a + 1 if ek11 == "C"
replace score1_a = score1_a + 1 if ek12 == "E"

gen mscore1 = 0
la var mscore1 "cum. math score test 1"
replace mscore1 = mscore1 + 1 if ek13 == "B"
replace mscore1 = mscore1 + 1 if ek14 == "C"
replace mscore1 = mscore1 + 1 if ek15 == "C"
replace mscore1 = mscore1 + 1 if ek16 == "B"
replace mscore1 = mscore1 + 1 if ek17 == "C"

keep hhid07 pid07 score1 score1_a mscore1

tempfile cogmeasures1_clean
save "`cogmeasures1_clean'", replace

*Harder test
use bek_ek2.dta, clear

*Keep ones with results
keep if result == 1

gen score2 = 0
la var score2 "cum. score Rav SPM2 (same as score1_a)"
replace score2 = score2 + 1 if ek1 == "E"
replace score2 = score2 + 1 if ek2 == "F"
replace score2 = score2 + 1 if ek3 == "A"
replace score2 = score2 + 1 if ek4 == "D"
replace score2 = score2 + 1 if ek5 == "C"
replace score2 = score2 + 1 if ek6 == "B"
replace score2 = score2 + 1 if ek11 == "C"
replace score2 = score2 + 1 if ek12 == "E"

gen mscore2 = 0
la var mscore2 "cum. math score test 2"
replace mscore2 = mscore2 + 1 if ek18 == "B"
replace mscore2 = mscore2 + 1 if ek19 == "D"
replace mscore2 = mscore2 + 1 if ek20 == "C"
replace mscore2 = mscore2 + 1 if ek21 == "D"
replace mscore2 = mscore2 + 1 if ek22 == "B"

keep hhid07 pid07 score2 mscore2

merge 1:1 hhid07 pid07 using "`cogmeasures1_clean'", gen(_mergecog)

egen ct = rowtotal(score2 mscore2 score1 score1_a mscore1)
drop if ct == 0

*If have two scores for same thing in same year, go with the score on the "harder" test first
gen dum = 0
replace dum = 1 if score2!= . & score1_a!=.

gen dist = .
replace dist = abs(score2 - score1_a) if dum == 1

gen rav_score = .
replace rav_score = score2 if score2 != .
replace rav_score = score1_a if score2 == . & score1_a != .
la var rav_score "Cum score Raven's SPM (0-8)"

keep hhid07 pid07 rav_score

tempfile bookek_clean
save "`bookek_clean'", replace

***Clean Book K, which contains HH roster and can be used to construct HH size
use bk_cov.dta

*Number of people
merge 1:1 hhid07 using bk_ar0.dta, gen(_mergear0) keepus(hhsize)

tempfile bookk_clean
save "`bookk_clean'", replace

***Clean Book 2, which contains HH's experience of events/natural disasters, and crop information
use b2_cov.dta 

*Experiences of natural disasters, etc
merge 1:1 hhid07 using b2_nd1.dta, gen(_mergend1) keepus(nd01 nd02)

*Crop information
merge 1:1 hhid07 using b2_ut1.dta, gen(_mergeut1) keepus(ut00a ut07*)

tempfile book2_clean
save "`book2_clean'", replace

***HH income, assets, etc
cd "$reploc/programs"
do clean_ifls4_balsheet.do
tempfile hhbalsheetfull
save "`hhbalsheetfull'", replace

***Merge all books/information

*Individual
use "`book3a_clean'", clear
merge 1:1 hhid07 pid07 using "`book3b_clean'",  gen(_mergeb3b)
merge 1:1 hhid07 pid07 using "`bookek_clean'",  gen(_mergeek)

tempfile individualdata
save "`individualdata'", replace

*Household
use "`bookk_clean'", clear
merge m:1 hhid07 using "`book2_clean'",  gen(_mergeb2)
merge m:1 hhid07 using "`hhbalsheetfull'", gen(_mergebalsheet)
drop if _mergebalsheet!=3

tempfile hhdata
save "`hhdata'"


*Merge individual and HH
use "`individualdata'", clear

*Drop duplicates
sort pidlink
quietly by pidlink:  gen dup = cond(_N==1,0,_n)
drop if dup!=0

***Merge tracking info
merge 1:1 pidlink using ptrack.dta, gen(_mergeptrack)
drop if _mergeptrack!=3
drop _mergeptrack

merge m:1 hhid07 using "`hhdata'", gen(_mergehh)
drop if _mergehh!=3

*Rename variables to know they are IFLS4 variables, except for pidlink, which is unique individual identifier across waves
foreach x of varlist _all {
if "`x'"!="pidlink" {
	rename `x' IFLS4_`x'
}
}

gen ifls=4


